Anomaly Detection
Table of Contents
Causes of Anomalies
Anomaly Detection
Applications of Anomaly Detection
Difficulties with Anomaly Detection
Use of Data Labels in Anomaly Detection
Output of Anomaly Detection
Variants of Anomaly Detection Problem
Univariate Gaussian Distribution
$\quad \;$ where $\bar x$ is a sample mean and $s$ is a sample variance.
Multivariate Gaussian Distribution
Pros and Cons
Convolutional Autoencoder (CAE)
Anomaly Score
Reconstruction Error
Root mean squared error (RMSE)
Import Library
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import random
Load MNIST Data
(train_imgs, train_labels), (test_imgs, test_labels) = tf.keras.datasets.mnist.load_data()
train_imgs, test_imgs = train_imgs/255.0, test_imgs/255.0
print('shape of x_train:', train_imgs.shape)
print('shape of y_train:', train_labels.shape)
print('shape of x_test:', test_imgs.shape)
print('shape of y_test:', test_labels.shape)
Seperate Normal and Abnormal Data
normal_train_index = np.hstack([np.where(train_labels == 7)])[0]
normal_test_index = np.hstack([np.where(test_labels == 7)])[0]
abnormal_test_index = np.hstack([np.where(test_labels == 5)])[0]
normal_train_x = train_imgs[normal_train_index].reshape(-1,28,28,1)
normal_train_y = train_labels[normal_train_index]
normal_test_x = test_imgs[normal_test_index].reshape(-1,28,28,1)
normal_test_y = test_labels[normal_test_index]
abnormal_test_x = test_imgs[abnormal_test_index].reshape(-1,28,28,1)
abnormal_test_y = test_labels[abnormal_test_index]
print('shape of normal_train_x:', normal_train_x.shape)
print('shape of normal_test_x:', normal_test_x.shape)
print('shape of abnormal_test_x:', abnormal_test_x.shape)
Plot Normal and Abnormal Data
random.seed(6)
normal_rand_n = random.sample(range(normal_train_x.shape[0]), 10)
plt.figure(figsize = (10,4))
j = 1
for i in range(4):
plt.subplot(1,4,j)
plt.imshow(normal_train_x[normal_rand_n[i+5]], 'gray')
plt.title('Normal')
plt.axis('off')
j += 1
plt.tight_layout()
plt.show()
random.seed(11)
normal_rand_a = random.sample(range(abnormal_test_x.shape[0]), 4)
plt.figure(figsize = (10,4))
j = 1
for i in range(4):
plt.subplot(1,4,j)
plt.imshow(abnormal_test_x[normal_rand_a[i]], 'gray')
plt.title('Abnormal')
plt.axis('off')
j += 1
plt.tight_layout()
plt.show()
Build a Model
# Encoder
encoder = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters = 32,
kernel_size = (3,3),
strides = (2,2),
activation = 'relu',
padding = 'SAME',
input_shape = (28, 28, 1)),
tf.keras.layers.Conv2D(filters = 64,
kernel_size = (3,3),
strides = (2,2),
activation = 'relu',
padding = 'SAME',
input_shape = (14, 14, 32)),
tf.keras.layers.Conv2D(filters = 2,
kernel_size = (7,7),
padding = 'VALID',
input_shape = (7,7,64))
])
encoder.summary()
# Decoder
decoder = tf.keras.models.Sequential([
tf.keras.layers.Conv2DTranspose(filters = 64,
kernel_size = (7,7),
strides = (1,1),
activation = 'relu',
padding = 'VALID',
input_shape = (1, 1, 2)),
tf.keras.layers.Conv2DTranspose(filters = 32,
kernel_size = (3,3),
strides = (2,2),
activation = 'relu',
padding = 'SAME',
input_shape = (7, 7, 64)),
tf.keras.layers.Conv2DTranspose(filters = 1,
kernel_size = (7,7),
strides = (2,2),
padding = 'SAME',
input_shape = (14,14,32))
])
decoder.summary()
latent = encoder.output
result = decoder(latent)
model = tf.keras.Model(inputs = encoder.input, outputs = result)
model.compile(optimizer = 'adam',
loss = 'mean_squared_error')
model.fit(normal_train_x, normal_train_x, epochs = 10)
Look at Latent Space
random.seed(2)
idx_n = np.random.choice(normal_test_x.shape[0], 1000)
idx_a = np.random.choice(abnormal_test_x.shape[0], 50)
test_x_n, test_y_n = normal_test_x[idx_n], normal_test_y[idx_n]
test_x_a, test_y_a = abnormal_test_x[idx_a], abnormal_test_y[idx_a]
normal_latent = encoder.predict(test_x_n)
normal_latent = normal_latent.reshape(-1,2)
abnormal_latent = encoder.predict(test_x_a)
abnormal_latent = abnormal_latent.reshape(-1,2)
plt.figure(figsize = (8,8))
plt.scatter(normal_latent[test_y_n == 7, 0], normal_latent[test_y_n == 7, 1], label = 'Normal 7')
plt.scatter(abnormal_latent[:, 0], abnormal_latent[:, 1], label = 'Abnormal')
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(fontsize = 15)
plt.show()
Test
# Normal
normal_input = normal_test_x[0].reshape(-1,28,28,1)
normal_recon = model.predict(normal_input)
n_recon_err = model.evaluate(normal_input, normal_input)
plt.figure(figsize = (10,8))
plt.subplot(1,2,1)
plt.imshow(normal_input[0], 'gray')
plt.title('Input image')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(normal_recon[0], 'gray')
plt.title('Reconstructed image')
plt.axis('off')
plt.show()
print('Reconstruciton error: ', n_recon_err)
# Abnormal
abnormal_input = abnormal_test_x[0].reshape(-1,28,28,1)
abnormal_recon = model.predict(abnormal_input)
ab_recon_err = model.evaluate(abnormal_input, abnormal_input)
plt.figure(figsize = (10,8))
plt.subplot(1,2,1)
plt.imshow(abnormal_input[0], 'gray')
plt.title('Input image')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(abnormal_recon[0], 'gray')
plt.title('Reconstructed image')
plt.axis('off')
plt.show()
print('Reconstruciton error: ', ab_recon_err)
Anomaly Detection
normal_err = []
abnormal_err = []
for i in range(200):
img = normal_test_x[i].reshape(-1,28,28,1)
normal_err.append(model.evaluate(img, img, verbose = 0))
for j in range(200):
img = abnormal_test_x[j].reshape(-1,28,28,1)
abnormal_err.append(model.evaluate(img, img, verbose = 0))
import scipy.stats as st
threshold = 0.05
plt.figure(figsize = (8, 6))
plt.plot(normal_err, '.', label = 'Normal')
plt.plot(abnormal_err, '.', label = 'Abnormal')
plt.xlabel('Data point index')
plt.ylabel('Reconstruction error')
plt.axhline(y=threshold, color='r', linestyle='-')
plt.legend()
plt.show()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')